{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 4 Pandas的索引操作"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:18:24.090453Z",
     "start_time": "2025-01-07T06:18:22.510983Z"
    }
   },
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "\n",
    "dict_data = {'A': 1,\n",
    "             'B': pd.Timestamp('20190926'),\n",
    "             'C': pd.Series(1, index=list(range(4)),dtype='float32'),\n",
    "             'D': np.array([1,2,3,4],dtype='int32'),\n",
    "             'E': [\"Python\",\"Java\",\"C++\",\"C\"],\n",
    "             'F': 'wangdao' }\n",
    "df_obj2 = pd.DataFrame(dict_data)\n",
    "print(df_obj2.index)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:20:22.443810Z",
     "start_time": "2025-01-07T06:20:22.440097Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([0, 1, 2, 3], dtype='int64')\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "# 索引对象的值不可变（上面代码增加）\n",
    "# df_obj2.index[0] = 2"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 3 常见的Index种类\n",
    "•Index，索引  可以是各种类型\n",
    "•Int64Index，整数索引\n",
    "•MultiIndex，层级索引，难度较大\n",
    "•DatetimeIndex，时间戳类型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "ser_obj = pd.Series(range(5), index = list(\"abcde\"))\n",
    "print(ser_obj)\n",
    "ser_obj.index"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:22:32.965657Z",
     "start_time": "2025-01-07T06:22:32.959214Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['a', 'b', 'c', 'd', 'e'], dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "source": [
    "# 行索引，不仅可以用索引名，可以用索引位置或来取\n",
    "print(ser_obj['b']) #索引名\n",
    "print(ser_obj[2]) #位置索引"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:23:25.062689Z",
     "start_time": "2025-01-07T06:23:25.059887Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\41507\\AppData\\Local\\Temp\\ipykernel_28788\\2834633063.py:3: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(ser_obj[2]) #位置索引\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T06:26:06.439280Z",
     "start_time": "2025-01-07T06:26:06.436401Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print(ser_obj.loc['b']) #索引名\n",
    "print(ser_obj.iloc[2]) #位置索引"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "source": [
    "# 切片索引\n",
    "print(ser_obj.iloc[1:3])  #索引位置取数据，左闭右开\n",
    "print(ser_obj.loc['b':'d'])  #记住索引名  左闭右闭"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:27:09.288805Z",
     "start_time": "2025-01-07T06:27:09.285677Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "source": [
    "# 不连续索引\n",
    "print(ser_obj.iloc[[0, 2, 4]])\n",
    "print(ser_obj.loc[['a', 'e']])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:28:29.437819Z",
     "start_time": "2025-01-07T06:28:29.434047Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "c    2\n",
      "e    4\n",
      "dtype: int64\n",
      "a    0\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "source": [
    "# 布尔索引\n",
    "ser_bool = ser_obj > 2\n",
    "print(ser_obj)\n",
    "print(ser_bool)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:29:01.708147Z",
     "start_time": "2025-01-07T06:29:01.704381Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "a    False\n",
      "b    False\n",
      "c    False\n",
      "d     True\n",
      "e     True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T06:29:41.135202Z",
     "start_time": "2025-01-07T06:29:41.131370Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print('-'*50)\n",
    "print(ser_obj[ser_bool])\n",
    "\n",
    "print(ser_obj[ser_obj > 2]) #取出大于2的元素"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 4.4 DataFrame索引"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import numpy as np\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = ['a', 'b', 'c', 'd'])\n",
    "print(df_obj.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:30:41.908122Z",
     "start_time": "2025-01-07T06:30:41.903459Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "0 -0.370200 -0.197750 -0.324783 -1.670661\n",
      "1 -0.450206 -0.329816 -1.398775 -0.186920\n",
      "2 -0.046347  1.052843  0.839052 -1.346694\n",
      "3 -0.742516  0.213192  0.702666 -0.120256\n",
      "4  0.391755 -1.085500  1.086962 -1.676489\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "source": [
    "# 列索引\n",
    "print(df_obj['a']) # 返回Series类型\n",
    "print('-'*50)\n",
    "print(df_obj[['a']]) # 返回DataFrame类型\n",
    "print('-'*50)\n",
    "print(type(df_obj[['a']])) # 返回DataFrame类型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:31:47.772608Z",
     "start_time": "2025-01-07T06:31:47.768542Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.370200\n",
      "1   -0.450206\n",
      "2   -0.046347\n",
      "3   -0.742516\n",
      "4    0.391755\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "          a\n",
      "0 -0.370200\n",
      "1 -0.450206\n",
      "2 -0.046347\n",
      "3 -0.742516\n",
      "4  0.391755\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "markdown",
   "source": [
    "1. loc 标签索引(通过索引标签值获取数据)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 标签索引 loc，建议使用loc，效率更高\n",
    "# Series\n",
    "print(ser_obj)\n",
    "print(ser_obj['b':'d'])\n",
    "print(ser_obj.loc['b':'d']) #前闭后闭\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:32:26.677967Z",
     "start_time": "2025-01-07T06:32:26.674625Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = list('abcd'),\n",
    "                      index=list('abcde'))\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "print(df_obj['a'])  #建议不用,拿的是列\n",
    "print('-'*50)\n",
    "print(df_obj.loc['a'])  #拿的是行\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:32:39.637723Z",
     "start_time": "2025-01-07T06:32:39.633403Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a -1.178449 -0.651984 -0.285541  0.829486\n",
      "b  0.346051 -1.017756 -1.210371 -0.657400\n",
      "c -0.196890 -1.654030 -1.331606  0.175493\n",
      "d  0.557989  0.599905 -0.260700 -0.764953\n",
      "e -0.304918  1.463066 -1.895591 -1.266186\n",
      "--------------------------------------------------\n",
      "a   -1.178449\n",
      "b    0.346051\n",
      "c   -0.196890\n",
      "d    0.557989\n",
      "e   -0.304918\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "a   -1.178449\n",
      "b   -0.651984\n",
      "c   -0.285541\n",
      "d    0.829486\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 17
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T06:34:31.685896Z",
     "start_time": "2025-01-07T06:34:31.679858Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 第一个参数索引行，第二个参数是列,loc或者iloc效率高于直接用取下标的方式，前闭后闭\n",
    "print(df_obj.loc['a':'c', 'b':'d']) #连续索引\n",
    "print(df_obj.loc[['a','c'], ['b','d']]) #不连续索引\n",
    "print(df_obj.loc[['c'],['b']]) #取一个值,返回的是DataFrame类型\n",
    "print(df_obj.loc['c','b'])  #取一个值"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          b         c         d\n",
      "a -0.651984 -0.285541  0.829486\n",
      "b -1.017756 -1.210371 -0.657400\n",
      "c -1.654030 -1.331606  0.175493\n",
      "          b         d\n",
      "a -0.651984  0.829486\n",
      "c -1.654030  0.175493\n",
      "         b\n",
      "c -1.65403\n",
      "-1.654029501746479\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "markdown",
   "source": [
    "## iloc 位置索引(推荐使用)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "ser_obj\n",
    "print('-'*50)\n",
    "# Series\n",
    "print(ser_obj[1:3])\n",
    "print('-'*50)\n",
    "print(ser_obj.iloc[1:3]) # 前闭后开[)，效率高\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:37:39.951989Z",
     "start_time": "2025-01-07T06:37:39.948058Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "b    1\n",
      "c    2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "source": [
    "df_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:38:03.244644Z",
     "start_time": "2025-01-07T06:38:03.238749Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          a         b         c         d\n",
       "a -1.178449 -0.651984 -0.285541  0.829486\n",
       "b  0.346051 -1.017756 -1.210371 -0.657400\n",
       "c -0.196890 -1.654030 -1.331606  0.175493\n",
       "d  0.557989  0.599905 -0.260700 -0.764953\n",
       "e -0.304918  1.463066 -1.895591 -1.266186"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>-1.178449</td>\n",
       "      <td>-0.651984</td>\n",
       "      <td>-0.285541</td>\n",
       "      <td>0.829486</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>0.346051</td>\n",
       "      <td>-1.017756</td>\n",
       "      <td>-1.210371</td>\n",
       "      <td>-0.657400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>-0.196890</td>\n",
       "      <td>-1.654030</td>\n",
       "      <td>-1.331606</td>\n",
       "      <td>0.175493</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>0.557989</td>\n",
       "      <td>0.599905</td>\n",
       "      <td>-0.260700</td>\n",
       "      <td>-0.764953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>-0.304918</td>\n",
       "      <td>1.463066</td>\n",
       "      <td>-1.895591</td>\n",
       "      <td>-1.266186</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "source": [
    "\n",
    "# DataFrame，iloc是前闭后开[)\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "print(df_obj.iloc[0:2, 0:2]) \n",
    "print('-'*50)\n",
    "print(df_obj.iloc[[0,2], [0,2]]) # 不连续索引\n",
    "print('-'*50)\n",
    "print(df_obj.iloc[0,0]) # 取一个值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:39:59.214328Z",
     "start_time": "2025-01-07T06:39:59.209346Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a -1.178449 -0.651984 -0.285541  0.829486\n",
      "b  0.346051 -1.017756 -1.210371 -0.657400\n",
      "c -0.196890 -1.654030 -1.331606  0.175493\n",
      "d  0.557989  0.599905 -0.260700 -0.764953\n",
      "e -0.304918  1.463066 -1.895591 -1.266186\n",
      "--------------------------------------------------\n",
      "          a         b\n",
      "a -1.178449 -0.651984\n",
      "b  0.346051 -1.017756\n",
      "--------------------------------------------------\n",
      "          a         c\n",
      "a -1.178449 -0.285541\n",
      "c -0.196890 -1.331606\n",
      "--------------------------------------------------\n",
      "-1.1784485351851908\n"
     ]
    }
   ],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "source": [
    "#没有设置行和列索引的DataFrame，iloc和loc的区别\n",
    "df_obj2 = pd.DataFrame(np.random.randn(5,4))\n",
    "print(df_obj2)\n",
    "print('-'*50)\n",
    "print(df_obj2.iloc[0:2]) #左闭右开 2行\n",
    "print('-'*50)\n",
    "print(df_obj2.loc[0:2]) #左闭右闭 3行"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:40:33.131112Z",
     "start_time": "2025-01-07T06:40:33.124858Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -2.304706  0.203760 -0.368852 -0.542549\n",
      "1 -0.391182 -1.246250 -0.449172 -1.539267\n",
      "2 -0.138114  0.643528 -0.039295 -0.591453\n",
      "3  0.137359  0.746882  1.772355  0.708370\n",
      "4  0.736893 -1.930920  1.339430 -2.409278\n",
      "--------------------------------------------------\n",
      "          0        1         2         3\n",
      "0 -2.304706  0.20376 -0.368852 -0.542549\n",
      "1 -0.391182 -1.24625 -0.449172 -1.539267\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0 -2.304706  0.203760 -0.368852 -0.542549\n",
      "1 -0.391182 -1.246250 -0.449172 -1.539267\n",
      "2 -0.138114  0.643528 -0.039295 -0.591453\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 5.对齐运算"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "s1 = pd.Series(range(10, 20), index = range(10))\n",
    "s2 = pd.Series(range(20, 25), index = range(5))\n",
    "# Series 对齐运算\n",
    "print('s1+s2: ')\n",
    "s3=s1+s2\n",
    "print(s3)  #缺失数据默认是NaN  np.nan"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:42:25.062370Z",
     "start_time": "2025-01-07T06:42:25.057448Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s1+s2: \n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5     NaN\n",
      "6     NaN\n",
      "7     NaN\n",
      "8     NaN\n",
      "9     NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 24
  },
  {
   "cell_type": "code",
   "source": [
    "#两个长度不同的一维ndarray相加\n",
    "a1 = np.array([1,2,3,4,5])\n",
    "a2 = np.array([1]) # 长度为1\n",
    "print(a2.shape)\n",
    "print(a1+a2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:42:47.107746Z",
     "start_time": "2025-01-07T06:42:47.105076Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1,)\n",
      "[2 3 4 5 6]\n"
     ]
    }
   ],
   "execution_count": 25
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T06:43:17.445141Z",
     "start_time": "2025-01-07T06:43:17.441719Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print(s2)\n",
    "s1"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    20\n",
      "1    21\n",
      "2    22\n",
      "3    23\n",
      "4    24\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    10\n",
       "1    11\n",
       "2    12\n",
       "3    13\n",
       "4    14\n",
       "5    15\n",
       "6    16\n",
       "7    17\n",
       "8    18\n",
       "9    19\n",
       "dtype: int64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 26
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "--------------------------------------------------\n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5    15.0\n",
      "6    16.0\n",
      "7    17.0\n",
      "8    18.0\n",
      "9    19.0\n",
      "dtype: float64\n",
      "0    10.0\n",
      "1    10.0\n",
      "2    10.0\n",
      "3    10.0\n",
      "4    10.0\n",
      "5   -15.0\n",
      "6   -16.0\n",
      "7   -17.0\n",
      "8   -18.0\n",
      "9   -19.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(np.isnan(s3[6]))\n",
    "print('-'*50)\n",
    "print(s2.add(s1, fill_value = 0))  #未对齐的数据将和填充值做运算\n",
    "print(s2.sub(s1, fill_value = 0))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:18:59.732425700Z",
     "start_time": "2024-04-11T03:18:59.697446300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#df的对齐运算\n",
    "import numpy as np\n",
    "df1 = pd.DataFrame(np.ones((2,2)), columns = ['a', 'b'])\n",
    "df2 = pd.DataFrame(np.ones((3,3)), columns = ['a', 'b', 'c'])\n",
    "print(df1)\n",
    "print(df2)\n",
    "print('-'*50)\n",
    "print(df2.dtypes)\n",
    "print(df1-df2)\n",
    "print(df2.sub(df1, fill_value = 2)) #未对齐的数据将和填充值做运算"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:45:27.892692Z",
     "start_time": "2025-01-07T06:45:27.885036Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     a    b\n",
      "0  1.0  1.0\n",
      "1  1.0  1.0\n",
      "     a    b    c\n",
      "0  1.0  1.0  1.0\n",
      "1  1.0  1.0  1.0\n",
      "2  1.0  1.0  1.0\n",
      "--------------------------------------------------\n",
      "a    float64\n",
      "b    float64\n",
      "c    float64\n",
      "dtype: object\n",
      "     a    b   c\n",
      "0  0.0  0.0 NaN\n",
      "1  0.0  0.0 NaN\n",
      "2  NaN  NaN NaN\n",
      "     a    b    c\n",
      "0  0.0  0.0 -1.0\n",
      "1  0.0  0.0 -1.0\n",
      "2 -1.0 -1.0 -1.0\n"
     ]
    }
   ],
   "execution_count": 28
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 总结：没对齐的元素，默认填充NaN，对齐运算时，fill_value参数可以指定填充值。"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
